**Housekeeping
clear all
cap log close
log using ${rep_root}/logs/check_data_restriction.log, text replace
set more off

insheet using "${rep_root}/data/FOIA/CLEAR CR Status Data.csv" 
tempfile cr_temp
save `cr_temp'

clear
insheet using "${rep_root}/data/FOIA/CLEAR INFO Status Data.csv" 
append using `cr_temp'

tab status

  

//order chronologically
gen ampm = substr(status_time, -2,2)
tab ampm
gen hr = substr(status_time, 1, strpos(status_time,":")-1)
tab hr
gen min = substr(status_time, strpos(status_time,":")+1, 2)
destring hr min, replace
replace hr = hr + 12 if hr<12 & ampm=="PM" 
replace hr = 0 if hr==12 & ampm=="AM"



gen status_time_str = status_date+ " " + status_time
gen status_dt = date(status_date, "MDY", 2025)
gen status_yr = year(status_dt) 
gen status_month = month(status_dt) 
gen dow = dow(status_dt)
sort cr_number status_dt hr min    

**Get intake time for each case
preserve
sort cr_number status_dt hr min
by cr_number: keep if _n==1

keep cr_number status_dt hr min dow
rename status_dt intake_dt
rename hr hr_intake
rename min min_intake
rename dow dow_intake

tempfile intake
save `intake'
restore
 


sort cr_number
merge m:1 cr_number using `intake', nogen

 

**Create wide list of staff marked as pending assign team
preserve
keep if status=="PENDING ASSIGN TEAM"

sort cr_number
by cr_number: gen idx = _n
rename created_by intake_sup_

sum idx
local idx_max = r(max)

keep cr_number idx intake_sup_
reshape wide intake_sup_, i(cr_number) j(idx)

tempfile intake_wide
save `intake_wide'
restore 
 

***********************
**Bring in data on PERSONNEL WHO ARE NOT REALLY SUPERVISORS USING A VERIFIED LIST
***********************
 

//bring in list of investigating supervisors, and filter out those that are not. 
rename created_by status_sup  
replace status_sup = "SHANNON B HAYES" if status_sup == "SHANNON HAYES" //correct data error in name
 

preserve
clear
insheet using ${rep_root}/data/supervisors_identified.csv
tempfile sup_identified
save `sup_identified'
restore

merge m:1 status_sup using `sup_identified'
replace sup_identified = 0 if sup_identified !=1 
drop _merge


**identify whether the first supervisor is an identified supervisor
preserve
keep if status == "PENDING INVESTIGATION"
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1
keep cr_number sup_identified
rename sup_identified first_sup 
tempfile first_sup
save `first_sup'
restore
merge m:1 cr_number using `first_sup', nogen
replace first_sup = 0 if first_sup!=1


keep if inlist(agency, "IPRA", "OPS", "COPA")

**Restrict to years where cases were being randomly assigned
gen intake_yr = year(intake_dt) 
gen intake_mo = month(intake_dt)

 
 
keep if inrange(intake_yr, 2006, 2007) | inrange(intake_yr, 2015, 2018) | (intake_yr==2008 & intake_mo<=7) | (intake_yr==2014 & intake_mo>=9)   
 
 
 

gen intake_yearmo = ym(intake_yr, intake_mo)
format %tm intake_yearmo 

gen lead_team = status == "PENDING INVESTIGATION"

by cr_number, sort: egen assigned_team = max(lead_team)

by cr_number, sort: egen identified_sup = max(first_sup)


preserve
collapse (max) assigned_team (max) first_sup, by(cr_number intake_dt)

tab assigned_team //there are 9973 complaints that are brought in the status; 2,052 are not sent to investigation team
restore
 

preserve
//take the person who is in the role of "waiting for their investigator to investigate"  

keep if status == "PENDING INVESTIGATION"


 
//take the first supervising investigator
sort cr_number status_dt hr min  
by cr_number: keep if _n == 1


//drop if not an identified supervisor
drop if sup_identified !=1



keep cr_number status_sup intake_dt //sup_identified

tab status_sup

sort cr_number
merge 1:1 cr_number using `intake_wide', keep(1 3) nogen

gen byintake = 0 

forvalues i = 1/`idx_max'{
	//drop if status_sup==intake_sup_`i'
	replace byintake = 1 if status_sup==intake_sup_`i'
}





sort status_sup 
egen inv_status_id = group(status_sup) 

gen sup_sample = 1

tempfile inv_in
save `inv_in'
restore

merge m:1 cr_number using `inv_in'

keep if _merge == 3 | _merge == 1
drop _merge

collapse (max) assigned_team (max) sup_sample (max) byintake, by(cr_number intake_dt)


rename cr_number cr_id
tempfile status
save `status'


 
 
 
 
***************************************************************************** 
**Restrict data from the the complaints data: we start with the raw data that has gibberish names
 
 
 
 
 

***********************
**MERGE SUPERVISORS WITH COMPLAINTS DATA
***********************

*******************
*BRING IN COMPLAINTS DATA
*******************
**Read main complants data
use ${rep_root}/data/complaints_2021, clear


tempfile complaints_old
save `complaints_old'		   

**Read recent complaints data
use ${rep_root}/data/complaints_cms, clear

**Standardize finding variable across datasets
rename recommended_finding finding_cd
replace finding_cd = upper(finding_cd) 

**Add back main complaints data
append using `complaints_old'		   

**Store officer name
gen fname = trim(acc_fname)
gen lname = trim(acc_lname)
gen mi = trim(acc_mi)

*Store officer appointment year
gen appoint_yr = year(acc_appoint_dt)
gen appoint_m = month(acc_appoint_dt)
gen appoint_d = day(acc_appoint_dt)		   

**Store officer birthyear
gen birthyear = acc_byr

**Standardize form of missing middle initial
replace mi = "" if mi == "NULL"


 
 
**RESTRICT TO SAMPLES WITH RANDOMIZATION
  



 
 
**Restrict to cases investigated by an IPRA, OPS, or COPA supervisor
keep if inlist(investigating_agency, "IPRA", "OPS", "COPA")
*keep if investigating_agency=="IPRA"
//keep if inv_supervisor==1 
//drop if inv_simul_assign==1
tab finding_cd

gen comp_yr = year(inc_complaint_dt_1)
gen comp_mo = month(inc_complaint_dt_1)
gen comp_yearmo = ym(comp_yr, comp_mo)
format %tm comp_yearmo 

count if comp_yearmo<ym(2006,9) //23,629

gen close_yr = year(inc_closed_dt_1)
drop if close_yr==1999

 
tab finding_cd
merge m:1 cr_id using `status'

replace sup_sample = 0 if sup_sample == .
replace assigned_team = 0 if assigned_team == .
replace byintake = 0 if byintake == .

//it seems status data are all matched to the complaint register data

tab comp_yearmo if _merge == 1

**Restrict to years where cases were being randomly assigned
gen intake_yr = year(intake_dt) 
gen intake_mo = month(intake_dt)
gen intake_yearmo = ym(intake_yr, intake_mo)
format %tm intake_yearmo 

tab _merge // total of 57649 (_merge == 1 and _merge == 3)
 
gen drop_period = !(inrange(intake_yr, 2006, 2007) | inrange(intake_yr, 2015, 2018) | (intake_yr==2008 & intake_mo<=7) | (intake_yr==2014 & intake_mo>=9))   
 
 
 



tab _merge //3741 doesn't have case information
tab assigned_team _merge
tab sup_sample 

tab byintake




preserve
keep if _merge == 3


drop if drop_period 
count if byintake ==0 & sup_sample ==1
tab sup_sample
**Define some covariates
gen po = acc_rank == "POLICE OFFICER"
gen minor_inj = vic_inj_1<=1
gen vic_old = (year(inc_complaint_dt_1) - vic_byr_oldest)>=35 & !missing(vic_byr_oldest)
gen acc_old = acc_age>=35 & !missing(acc_age)
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4
sum vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4

gen year = year(intake_dt)
tab byintake year
 
eststo clear 
eststo all: estpost summarize vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  
eststo dropintake1: estpost summarize vic_female_any vic_white_any acc_white acc_male    cat_3 cat_4   if byintake ==1 
eststo dropintake2: estpost summarize vic_female_any vic_white_any acc_white acc_male  cat_3 cat_4   if byintake ==0 
eststo dropintake_sup_sample1:  estpost summarize vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4   if sup_sample ==1 & byintake == 0 
eststo dropintake_sup_sample2:  estpost summarize vic_female_any vic_white_any acc_white acc_male cat_3 cat_4   if sup_sample ==0 & byintake == 0 


eststo diffa:  estpost ttest vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  , by(byintake) unequal
eststo diffb:  estpost ttest vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4 if byintake==0 , by(sup_sample) unequal
 
 

esttab  all dropintake1 dropintake2 dropintake_sup_sample1 dropintake_sup_sample2 diffa diffb using "${rep_root}/output/tables/descriptive_drops.tex" ,  replace tex coeflabel(vic_female_any "Any Female Victim" vic_white_any "Any White Victim" acc_white "Accused Officer White" acc_male "Accused Officer Male" cat_3 "Civil Rights Complaint" cat_4 "Force Complaint") main(mean %9.3gc) aux(sd %9.3gc) collabels(none) cells("mean(pattern(1 1 1 1 1 0 0  ) fmt(2)) b(star pattern(0 0 0 0 0 1 1) fmt(%9.3f))" "sd(pattern(1 1 1 1 1 0 0) par fmt(%9.3f))  se(pattern(0 0 0 0 0 1 1) par fmt(%9.3f))") ///
modelwidth(20) mgroups("Mean (SD)" "Differences", ///
	pattern(1 0 0 0 0 1 0 ) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))  mtitles("Full" "Non-Intake" "Intake" "Identified Supervisors" "Non-Identified Supervisors"  "Col 2 vs 3" " Col4 vs 5" )   starlevels(* 0.10 ** 0.05 *** 0.01) stats(N, labels("Observations") fmt(%9.3gc)) 
 
  





restore




 

preserve
 keep if _merge == 1 | _merge == 3
  drop if comp_yr>=2019 //prior to that, sparse victim data
   tab _merge

 keep if comp_yearmo>=ym(2006,9) //prior to that, sparse victim data
 
 tab _merge
count if byintake ==0 & sup_sample ==1
tab sup_sample

 
gen period = 1 if drop_period == 0 & comp_yr<=2008
replace period = 2 if drop_period == 1 & comp_yr>2008 & comp_yr<=2015
replace period = 3 if drop_period == 0 & comp_yr>=2014

 

**Define some covariates
gen po = acc_rank == "POLICE OFFICER"
gen minor_inj = vic_inj_1<=1
gen vic_old = (year(inc_complaint_dt_1) - vic_byr_oldest)>=35 & !missing(vic_byr_oldest)
gen acc_old = acc_age>=35 & !missing(acc_age)
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4
sum vic_female_any vic_white_any acc_white acc_male acc_old cat_3 cat_4

gen year = year(intake_dt)
tab byintake year
 
eststo clear 
eststo all: estpost summarize vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  
eststo sample0: estpost summarize vic_female_any vic_white_any acc_white acc_male  cat_3 cat_4   if drop_period ==1 
eststo sample1: estpost summarize vic_female_any vic_white_any acc_white acc_male    cat_3 cat_4   if drop_period ==0

 

 

 
eststo sample4: estpost summarize vic_female_any vic_white_any acc_white acc_male  cat_3 cat_4   if period == 1
eststo sample5: estpost summarize vic_female_any vic_white_any acc_white acc_male    cat_3 cat_4   if period == 3
 

eststo diffa:  estpost ttest vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  , by(drop_period) unequal
 
 

esttab  all sample0 sample1   sample4 sample5 diffa  using "${rep_root}/output/tables/descriptive_sample_period.tex" ,  replace tex coeflabel(vic_female_any "Any Female Victim" vic_white_any "Any White Victim" acc_white "Accused Officer White" acc_male "Accused Officer Male" cat_3 "Civil Rights Complaint" cat_4 "Force Complaint") main(mean %9.3gc) aux(sd %9.3gc) collabels(none) cells("mean(pattern(1 1 1 1 1  0     ) fmt(2)) b(star pattern(0 0 0 0 0   1  ) fmt(%9.3f))" "sd(pattern(1 1 1 1 1  0    ) par fmt(%9.3f))  se(pattern(0 0 0 0 0 1 ) par fmt(%9.3f))") ///
modelwidth(20) mgroups("Mean (SD)" "Differences"  , ///
	pattern(1 0 0 0 0  1   ) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))  mtitles("2006-2018" "Non-Sample Period"  "Sample Period"  "Sep 2006-Jul 2008" "Sep 2014-Dec 2018"   "Col 2 vs 3"  )   starlevels(* 0.10 ** 0.05 *** 0.01) stats(N, labels("Observations") fmt(%9.3gc)) 
 
  
 

restore




keep if _merge == 3
drop if drop_period





*************Drop 1
drop if byintake ==1
 
tab assigned_team if _merge !=2 //all the entries in the complaints data have supervisors assigned
tab sup_sample assigned_team if _merge !=2


*************Drop 2
keep if sup_sample == 1

 




tab _merge  //all from the complaints data have merged status that have investigative status
 

 
 
drop _merge





 

drop inv_id 
 



gen nolname = lname == ""
tab nolname finding
 
tab nolname
tab finding_cd nolname

gen namedrop = 0
**Clean some clearly errouneous name information
sort fname lname mi appoint_yr appoint_m appoint_d birthyear 
replace namedrop = 1 if lname == ""
replace namedrop = 1 if fname == ""
replace namedrop = 1 if fname == "Unknown" 
replace namedrop = 1 if substr(fname,1,1)<"A"
replace namedrop = 1 if substr(fname,1,1)>"Z"
replace namedrop = 1 if fname == "White man"
replace namedrop = 1 if fname == "Will county"
replace namedrop = 1 if fname == "Wouldn't identify themselves"
replace namedrop = 1 if fname == "Test"
replace namedrop = 1 if fname == "Tall"
replace namedrop = 1 if fname == "Toney with a Y"
replace namedrop = 1 if fname == "U FIGURE"
replace namedrop = 1 if fname == "UNKNOWN"
replace namedrop = 1 if fname == "UNK"
replace namedrop = 1 if fname == "UN"
replace namedrop = 1 if fname == "The Entire Chicago Police"
replace namedrop = 1 if strpos(fname, "Unknown")
replace namedrop = 1 if strpos(fname, "Unknwn")
replace namedrop = 1 if fname == "Any"
replace namedrop = 1 if fname == "B"
replace namedrop = 1 if fname == "Chicago"
replace namedrop = 1 if fname == "Chicago land alldirection Northsouth east west."
replace namedrop = 1 if fname == "Chief"
replace namedrop = 1 if fname == "City of Chicago"
replace namedrop = 1 if fname == "Cop"
replace namedrop = 1 if fname == "Cumsucking pig"
replace namedrop = 1 if fname == "Kenneth Capp _ James Entree 9th District Police Corruption"
replace namedrop = 1 if strpos(fname, "No police")
replace namedrop = 1 if strpos(fname, "None")
replace namedrop = 1 if strpos(fname, "Applicable")
replace namedrop = 1 if strpos(fname, "Not")
replace namedrop = 1 if strpos(fname, "Noone")
replace namedrop = 1 if strpos(fname, "Online")
replace namedrop = 1 if strpos(fname, "Refused")
replace namedrop = 1 if strpos(fname, "River Place")
replace namedrop = 1 if strpos(fname, "Dumb")
replace namedrop = 1 if strpos(fname, "I do")
replace namedrop = 1 if strpos(lname, "Kingdom")
replace namedrop = 1 if strpos(fname, "Many unknown")
replace namedrop = 1 if strpos(fname, "N/")
replace namedrop = 1 if strpos(fname, "NA")
replace namedrop = 1 if strpos(fname, "Nine")
replace namedrop = 1 if strpos(fname, "No Department")
replace namedrop = 1 if strpos(fname, "Name")
replace namedrop = 1 if fname == "Na" 
replace namedrop = 1 if fname == "N" 
replace fname = subinstr(fname, "Officer", "", .)
replace fname = subinstr(fname, "Sergeant", "", .)
replace fname = subinstr(fname, "Sgt", "", .)
replace fname = subinstr(fname, "Mr.", "", .)
replace fname = "" if fname == "Mr"
replace namedrop = 1 if strpos(fname, "SHOULD NOT")
replace namedrop = 1 if strpos(fname, "Some")
replace namedrop = 1 if strpos(fname, "Chicago")
replace namedrop = 1 if lname == "Doe female shooter jumpertyee misconduct"
replace namedrop = 1 if lname == "?"
replace namedrop = 1 if lname == "Iiiiiii"
replace namedrop = 1 if strpos(lname, "Doe ")
replace namedrop = 1 if strpos(lname, "Does")
replace namedrop = 1 if strpos(lname, "N/a")
replace namedrop = 1 if strpos(fname, "John doe")
replace namedrop = 1 if strpos(lname, "a Few")
replace namedrop = 1 if strpos(lname, "Alpesh and Carlton Nelson and Debra and nancy")
replace namedrop = 1 if strpos(lname, "Daley  Standing Office at Mayor")
replace namedrop = 1 if strpos(fname, "St Charles Parish")
replace namedrop = 1 if strpos(lname, "Annas baby daddy the green house on with the flags on lincoln and sacremento")
replace namedrop = 1 if strpos(lname, "Population")
replace namedrop = 1 if strpos(fname, "Unknow")
replace namedrop = 1 if strpos(fname, "Unkown")
replace namedrop = 1 if strpos(fname, "Upstairs")
replace namedrop = 1 if strpos(fname, "See Video")
replace namedrop = 1 if strpos(fname, "Morgan park at")

//some obs have variations of last names: deflate last names
gen lname_mod = subinstr(lname, "-", "", .)
replace lname_mod = subinstr(lname_mod, " ", "", .)
replace lname_mod = subinstr(lname_mod, ".", "", .)

 
gen cat_3 = acc_cat==3
gen cat_4 = acc_cat==4

eststo clear 
eststo all: estpost summarize vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  
eststo sample0: estpost summarize vic_female_any vic_white_any acc_white acc_male  cat_3 cat_4   if namedrop ==1 
eststo sample1: estpost summarize vic_female_any vic_white_any acc_white acc_male    cat_3 cat_4   if namedrop ==0
 eststo diffa:  estpost ttest vic_female_any vic_white_any acc_white acc_male   cat_3 cat_4  , by(namedrop) unequal

esttab  all sample0 sample1     diffa  using "${rep_root}/output/tables/descriptive_noname.tex" ,  replace tex coeflabel(vic_female_any "Any Female Victim" vic_white_any "Any White Victim" acc_white "Accused Officer White" acc_male "Accused Officer Male" cat_3 "Civil Rights Complaint" cat_4 "Force Complaint") main(mean %9.3gc) aux(sd %9.3gc) collabels(none) cells("mean(pattern(1 1 1    0     ) fmt(2)) b(star pattern(0 0 0     1  ) fmt(%9.3f))" "sd(pattern(1 1 1    0    ) par fmt(%9.3f))  se(pattern(0 0   0 1 ) par fmt(%9.3f))") ///
modelwidth(20) mgroups("Mean (SD)" "Differences"  , ///
	pattern(1 0 0 0 0  1   ) prefix(\multicolumn{@span}{c}{) suffix(}) span erepeat(\cmidrule(lr){@span}))  mtitles("Sample Before Dropping No Identified Officers" "No Identified Officers"  "Identified Officers"     "Col 2 vs 3"  )   starlevels(* 0.10 ** 0.05 *** 0.01) stats(N, labels("Observations") fmt(%9.3gc)) 
 

drop if namedrop == 1

//merge mid that we attached to complaints data
preserve
local ver_suff _9_21
use ${rep_root}/data/FOIA/FOIA_unified/complaints ,clear
 
by lname_mod fname mi acc_byr acc_appoint_dt cr_id acc_id, sort: keep if _n ==1
tempfile comp_mid
save `comp_mid'
restore
 
merge m:1 cr_id acc_id lname_mod fname mi acc_byr acc_appoint_dt using `comp_mid' ,keepusing(mid)
keep if _merge == 3
drop _merge

count if mid == .
 








**Drop shootings, domestic violence, and non-civillian complaints
drop if inc_shooting_1==1 | (vic_inj_1>=3 & !missing(vic_inj_1))
drop if inc_any_domestic==1
drop if inc_civilian_1==0

**Drop if missing crucial data
drop if missing(vic_race_1) | missing(vic_male_1) | missing(acc_white) | missing(acc_male) | missing(acc_age) | missing(inc_district_1) | missing(finding_cd)
tab finding_cd


**Drop non-POs
drop if acc_rank != "POLICE OFFICER"  

**Keep only cases where the accused officer made at least one arrest at some point and we have outcome data
sort mid
merge m:1 mid using ${rep_root}/data/mids_with_arrests, keep(1 3)  //this drops 13 observations

  